
*****************************************************************************
* Correspondence between PCS (Professions and Socio-professional Categories) and ROME (Operational Directory of Trades)
clear  // Clear all data and variables from memory

* Import Excel file with PCS/ROME correspondence, ensuring all data is imported as strings
import excel using "${input_stata}\data_Pole-Emploi\methodo pole emploi\fap2009_pcs2003_romev3-1.xls", ///
firstrow allstring clear sheet("Table")

rename (FAP Famillesprofessionnelles PCS Professionsetcatégoriessociop ROME Qualification RépertoireOpérationneldesMéti) ///
       (FAP Famillesprofessionnelles pcs libel_pcs code_rome qualification libel_rome)

keep pcs libel_pcs code_rome libel_rome

* Fill in missing values by carrying the previous value forward within the dataset
drop if pcs == "" & code_rome == ""
replace pcs = pcs[_n-1] if pcs == ""
replace libel_pcs = libel_pcs[_n-1] if libel_pcs == ""
replace code_rome = code_rome[_n-1] if code_rome == ""
replace libel_rome = libel_rome[_n-1] if libel_rome == ""

duplicates drop pcs code_rome, force

* Drop any remaining empty codes
drop if code_rome == "" 
drop if pcs == "" 

order pcs code_rome
format %40s lib*
sort dup pcs
drop dup
drop lib*

* Save the cleaned dataset
save "${input_stata}\data_Pole-Emploi\dta\correspondance_pcs_rome.dta", replace

*****************************************************************************
* List of employment basins
clear 

* Import Excel file containing employment basin data
import excel using "${input_stata}\data_Pole-Emploi\methodo pole emploi\BASSINS_ BMO_2013_envoi_site.xls", ///
firstrow allstring clear

* Rename variables for clarity
rename (Codecommune Codebassindemploi LibelléCommune Codedépartement) ///
       (code_commune code_bassin libel_city dep)

* Adjust department code length and generate a commune code
replace dep = substr(dep, 1, 2) if length(dep) > 2
gen com = substr(code_commune, 3, 3)

keep code_commune code_bassin com dep
destring com, replace float
duplicates drop dep com code_bassin, force

save "${input_stata}\data_Pole-Emploi\dta\correspondance_bassin_ville.dta", replace

*****************************************************************************
* Add data from 2001, 2011, and 2012 to the existing 2013 extract
use "${input_stata}\data_Pole-Emploi\dta\pole-emploi-extract-july18-2013.dta", clear  // Load existing 2013 data

* Clean the 'bassin' variable by removing spaces
replace bassin = subinstr(bassin, " ", "", .)

* Save the updated dataset
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

**********************************************************
* Import data from 2012
import excel using "${input_stata}\data_Pole-Emploi\2010 et 2011 et 2012.xls", firstrow allstring clear sheet("2012")
gen year = "2012"

* Rename variables
rename (codebassin Bassin codeROME ROME Région) ///
       (code_bassin bassin code_rome libel_metier region)
replace bassin = subinstr(bassin, " ", "", .)
keep code_bassin code_rome libel_metier year region bassin

* Append to the existing dataset and save
append using "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta"
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

***********************************************************
* Import data from 2011
clear  
import excel using "${input_stata}\data_Pole-Emploi\2010 et 2011 et 2012.xls", firstrow allstring clear sheet("2011")
gen year = "2011"
rename (Codebassin Bassin romev3 LibelléRome Région) ///
       (code_bassin bassin code_rome libel_metier region)
keep code_bassin code_rome libel_metier year region bassin
replace bassin = subinstr(bassin, " ", "", .)
append using "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta"
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

***********************************************************
* Import data from 2010
clear 
import excel using "${input_stata}\data_Pole-Emploi\2010 et 2011 et 2012.xls", firstrow allstring clear sheet("2010")
gen year = "2010"
rename (Codebassin Bassin romev3 LibelléRome Région) ///
       (code_bassin bassin code_rome libel_metier region)
replace bassin = subinstr(bassin, " ", "", .)
keep code_bassin code_rome libel_metier year region bassin

append using "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta"
tab year

* Save the combined dataset
drop index
destring year, force replace
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

* Create a dataset for 'bassin' codes in 2013
keep year bassin code_bassin
keep if year == 2013
duplicates drop bassin, force
drop year 
save "${input_stata}\data_Pole-Emploi\dta\bassin.dta", replace

* Use the main dataset and merge with 'bassin'
use "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", clear
tab bassin year 
replace code_bassin = "" if year < 2013
merge m:1 bassin using "${input_stata}\data_Pole-Emploi\dta\bassin.dta", update
drop if code_bassin == ""
sort year region code_bassin code_rome
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

*****************************************************************************
* Export data to CSV
use "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", clear

* Join with PCS/ROME correspondence data
joinby code_rome using "${input_stata}\data_Pole-Emploi\dta\correspondance_pcs_rome.dta"
drop if pcs == ""
drop libel_metier region bassin code_rome
sort year code_bassin pcs 
bys year code_bassin pcs: gen n = _N

duplicates drop code_bassin pcs year, force
drop _merge
sort code_bassin pcs year

* Save the final dataset
save "${input_stata}\data_Pole-Emploi\dta\pole-emploi-2010-2013.dta", replace

